Now it is time to read in the data and get into a workable format for our problem
# read in the data
df <- read.csv('Data/NEISS/sport_category_final.csv')
# create a contigency table
orig_table <- table(df$prod1,df$body_part)
# create a datafame from the contigency table (at least one that is intuitive)
tm<-as.data.frame.matrix(orig_table) # tm stands for table matrix
and some further cleanup. This time, we need to ensure that all of the classifications of the original data belong to the same categories of that in the Berger and Trinkaus paper.
head_neck <- tm$face + tm$neck + tm$head
shoulder_arm <- tm$`upper arm` + tm$`lower arm` + tm$shoulder + tm$elbow
hand <- tm$hand + tm$finger + tm$wrist
pelvis <- tm$`pubic region` + tm$hip
leg <- tm$knee + tm$`lower leg` + tm$`upper leg`
foot <- tm$foot + tm$toe + tm$ankle
trunk <- tm$`upper trunk` + tm$back
Now let’s wrap this all into a final dataframe that we will use throughout the rest of this analysis.
final<-as.data.frame(cbind(head_neck,shoulder_arm,hand,pelvis,leg,foot,trunk))
rownames(final)<-rownames(tm) # index will be the activity name
Grabbing the values off of the Berger and Trinkaus paper is the first task
sample1<-c(8,4,7,1,1,3,3) # total sample
sample2<-c(7,4,7,1,0,3,1) # without djd
sample3<-c(6,4,5,1,1,3,1) # without shandidar 1
sample4<-c(5,4,5,1,0,3,0) # without djd or shandidar 1
Now it is time to run the actual analysis. We are going to create some tables the involve running a chi square test on all of the samples against all of the activities as well as calculating Cramer’s V.
# total sample
nt<-t(apply(final,1,function(x) {
new<- cbind(sample1,x)
ch <- chisq.test(new)
chi<-c(unname(ch$statistic), ch$p.value)
cram<-CramerV(new)
cbind(chi,cram)
}))
# sample with out djd
nwd<-t(apply(final,1,function(x) {
new<- cbind(sample2,x)
ch <- chisq.test(new)
chi<-c(unname(ch$statistic), ch$p.value)
cram<-CramerV(new)
cbind(chi,cram)
}))
# sample without shandidar
nws<-t(apply(final,1,function(x) {
new<- cbind(sample3,x)
ch <- chisq.test(new)
chi<-c(unname(ch$statistic), ch$p.value)
cram<-CramerV(new)
cbind(chi,cram)
}))
# sample without shandidar or djd
nwsd<-t(apply(final,1,function(x) {
new<- cbind(sample4,x)
ch <- chisq.test(new)
chi<-c(unname(ch$statistic), ch$p.value)
cram<-CramerV(new)
cbind(chi,cram)
}))
Create a function to clean up some vital information about the Chi Square test
chi2cleanup<-function(table){
# read in one of the chi square tables (nt, nwd, nws or nwsd)
frame<-as.data.frame(table,row.names = rownames(table))
frame<-frame[-c(4)] # drop the extra p value column
names(frame) = c('X2','P-Value','Cramers V')
fin = na.omit(frame)
# create two callable objects, 1) the rows that have NAs and 2) the final data frame for some final manipulation
c<- list(
rowna=frame[is.nan(frame$X2),], # $rowna
final=fin, # $final
similar=fin[fin$`P-Value` > .05,] # $similar (activities that are similar, P > 0.05)
)
return(c)
}
# name cleaned up chi square tables
n_tot<-chi2cleanup(nt) # neanderthal total
n_djd<-chi2cleanup(nwd) # neanderthal w/o djd
n_s<-chi2cleanup(nws) # neanderthal w/o shan
n_sd<-chi2cleanup(nwsd) # neanderthal w/o shan or djd
Find similar activities per sample
n_tot$similar # neanderthal total
## X2 P-Value Cramers V
## altar 3.576823 0.73372229 0.2293476
## banisters 11.100461 0.08532075 0.4265850
## benches 8.976722 0.17489146 0.2781826
## bleachers 10.958421 0.08967001 0.2658938
## diving board 7.396248 0.28575071 0.2616941
## flying discs or boomerangs 11.355433 0.07799320 0.2944192
## golf 11.749776 0.06778954 0.2181053
## golf carts 9.972497 0.12581503 0.2179176
## lawn chair 10.638039 0.10022845 0.3791534
## loading docks 11.923561 0.06369594 0.4014090
## poles 11.112463 0.08496219 0.3553561
## roller hockey 9.546665 0.14508720 0.4022535
## snow tubing 10.748946 0.09645243 0.3946921
## swimsuit 7.175379 0.30493252 0.2165592
## water tubing 2.973634 0.81214870 0.1667062
## windsurfing 12.534779 0.05104817 0.2221475
n_djd$similar # neanderthal w/o djd
## X2 P-Value Cramers V
## altar 4.682090 0.58518450 0.27047673
## baseball 6.979352 0.32276095 0.04624864
## benches 9.566742 0.14412345 0.29226244
## bleachers 11.207769 0.08216346 0.27244015
## diving board 10.945581 0.09007295 0.32441635
## flying discs or boomerangs 11.922667 0.06371642 0.30639724
## golf carts 12.067448 0.06048094 0.24203273
## paddle ball 11.052587 0.08676471 0.36939368
## poles 8.549072 0.20057074 0.31902142
## roller hockey 8.292168 0.21747021 0.38828687
## swimsuit 9.449321 0.14984055 0.25182983
## water tubing 5.801308 0.44581190 0.23732547
n_s$similar # neanderthal w/o shan
## X2 P-Value Cramers V
## altar 1.972567 0.92220419 0.17836929
## banisters 11.173352 0.08316429 0.45072369
## baseball 6.915363 0.32874544 0.04605026
## benches 6.139257 0.40777277 0.23624442
## bleachers 5.866440 0.43831681 0.19842399
## cheerleading 11.380472 0.07730557 0.17192926
## diving board 7.126857 0.30927482 0.26433150
## flying discs or boomerangs 7.098503 0.31183373 0.23830238
## golf 7.484431 0.27835805 0.17622636
## golf carts 6.657463 0.35368900 0.18065056
## lawn chair 10.363262 0.11016446 0.39038559
## loading docks 8.473317 0.20543589 0.35299802
## mountain/all-terrain biking 10.561468 0.10291280 0.21713915
## paddle ball 9.776645 0.13437860 0.35178829
## patios/flooring 9.538668 0.14547269 0.23899324
## poles 6.966236 0.32398093 0.29146887
## roller hockey 5.897077 0.43481796 0.33356498
## snow tubing 9.206119 0.16231362 0.38226806
## swimming pools (not specified) 9.871133 0.13018486 0.23288837
## swimsuit 6.633137 0.35610991 0.21242265
## water tubing 2.910867 0.81994803 0.16976590
## windsurfing 9.849892 0.13111741 0.19929201
n_sd$similar # neanderthal w/o shan or djd
## X2 P-Value Cramers V
## altar 4.315178 0.63410647 0.27044152
## baseball 6.189503 0.40230034 0.04358653
## benches 8.090566 0.23154305 0.27497775
## bleachers 7.957986 0.24119759 0.23346681
## cheerleading 11.047118 0.08693106 0.17005634
## diving board 11.526266 0.07341106 0.34121391
## flying discs or boomerangs 9.385163 0.15304725 0.27735819
## golf 9.848193 0.13119225 0.20341819
## golf carts 9.484323 0.14811598 0.21722266
## lawn chair 11.823483 0.06602455 0.42649705
## loading docks 10.444960 0.10712155 0.40086369
## mountain/all-terrain biking 11.766333 0.06738931 0.23074082
## paddle ball 9.957890 0.12643658 0.36197358
## patios/flooring 10.016769 0.12394763 0.24713935
## poles 7.326147 0.29173540 0.30452592
## snow tubing 12.332112 0.05495688 0.45335991
## swimsuit 9.999291 0.12468188 0.26351380
## water tubing 6.295139 0.39095341 0.25344844
Lets look at what Rodeo Riders look like in comparison to the Neanderthal samples
n_tot$final['rodeo',] # neanderthal total
## X2 P-Value Cramers V
## rodeo 23.62703 0.000611504 0.2198114
n_djd$final['rodeo',] # neanderthal w/o djd
## X2 P-Value Cramers V
## rodeo 22.25172 0.001090031 0.2141958
n_s$final['rodeo',] # neanderthal w/o shan
## X2 P-Value Cramers V
## rodeo 16.68168 0.01052714 0.1858431
n_sd$final['rodeo',] # neanderthal w/o shan or djd
## X2 P-Value Cramers V
## rodeo 16.43343 0.01160729 0.1850306
similarSelector<-function(frame,final_frame,neander_sample){
require(reshape)
require(plyr)
indices=rownames(frame$similar) # find rows to set as indices
new_rows = final_frame[indices,] # map those to original contigency table
sample =append(c("neander","rodeo"),rownames(new_rows)) # add a new vector to use a "sample" column
rodeo = final_frame['rodeo',] # add rodeo riders
joined_rows = rbind(neander_sample,rodeo,new_rows) # join rows from neanderthal sample to new dataframe
props = prop.table(as.table(as.matrix(joined_rows)),1)
props<-as.data.frame.matrix(props)
joined_cols = cbind(sample,props) # add the "sample" column
rownames(joined_cols) = NULL # remove the row indices
melted <- melt(joined_cols, id=(c("sample"))) # transpose contigency table
return(melted)
}
# an example of how similarSelector works
simToNeanderTotal<-similarSelector(n_tot,final,sample1)
simToNeanderWOdjd<-similarSelector(n_djd,final,sample2)
simToNeanderWOShan<-similarSelector(n_s,final,sample3)
simToNeanderWOdjdOrShan<-similarSelector(n_sd,final,sample4)
Find jus the neanderthals in the above dataframes
# extract just the neanderthal row for emphasis in plots
n2<-simToNeanderTotal[simToNeanderTotal$sample=='neander',]
d2<-simToNeanderWOdjd[simToNeanderWOdjd$sample=='neander',]
s2<-simToNeanderWOShan[simToNeanderWOShan$sample=='neander',]
sd<-simToNeanderWOdjdOrShan[simToNeanderWOdjdOrShan$sample=='neander',]
# extract just the rodeo riders for emphasis in plots
n3<-simToNeanderTotal[simToNeanderTotal$sample=='rodeo',]
d3<-simToNeanderWOdjd[simToNeanderWOdjd$sample=='rodeo',]
s3<-simToNeanderWOShan[simToNeanderWOShan$sample=='rodeo',]
sd3<-simToNeanderWOdjdOrShan[simToNeanderWOdjdOrShan$sample=='rodeo',]
Plot those that are similar
plotMaker<-function(total_frame,neander_frame,rodeo_frame){
ggplot(data=total_frame, # this needs to be one of the dataframes directly above
aes(x=factor(variable), y=value,
group=sample,
color=sample)) +
geom_line() +
geom_point() +
geom_point(data=neander_frame,aes(x=factor(variable), y=value,
group=sample, size = 4))+
geom_line(data=neander_frame,aes(x=factor(variable), y=value,
group=sample, size = 2))+
geom_point(data=rodeo_frame,aes(x=factor(variable), y=value,
group=sample, size = 4))+
geom_line(data=rodeo_frame,aes(x=factor(variable), y=value,
group=sample, size = 2))+
scale_x_discrete("Proportion") +
scale_y_continuous("Body Part")+
guides(size=FALSE)
}
The total sample
plotMaker(simToNeanderTotal,n2,n3)
The sample with out djd
plotMaker(simToNeanderWOdjd,d2,d3)
The sample without Shandidar
plotMaker(simToNeanderWOShan,s2,s3)
The sample without djd or Shan
plotMaker(simToNeanderWOdjdOrShan,sd,sd3)